In [1]:
%pylab
%matplotlib inline


Using matplotlib backend: TkAgg
Populating the interactive namespace from numpy and matplotlib

In [2]:
cd ..


/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-net-work

In [3]:
import sys
import numpy as np
import skimage
import cv2
import sklearn
import imp

In [4]:
from holoviews import *


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-4-008fb02dab24> in <module>()
----> 1 from holoviews import *

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/__init__.py in <module>()
      8                             commit="$Format:%h$", reponame='holoviews')
      9 
---> 10 from .core.dimension import Dimension         # pyflakes:ignore (API import)
     11 from .core.boundingregion import BoundingBox  # pyflakes:ignore (API import)
     12 from .core.layout import *                    # pyflakes:ignore (API import)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/__init__.py in <module>()
      1 from .boundingregion import *  # pyflakes:ignore (API import)
----> 2 from .dimension import *       # pyflakes:ignore (API import)
      3 from .element import *         # pyflakes:ignore (API import)
      4 from .layout import *          # pyflakes:ignore (API import)
      5 from .operation import *       # pyflakes:ignore (API import)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/dimension.py in <module>()
     10 
     11 from ..core.util import valid_identifier
---> 12 from .options import Store
     13 from .pprint import PrettyPrinter
     14 

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/options.py in <module>()
     66 
     67 
---> 68 class Cycle(param.Parameterized):
     69     """
     70     A simple container class that specifies cyclic options. A typical

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/holoviews/core/options.py in Cycle()
     78 
     79     items = param.List(default=None, allow_None=True,  doc="""
---> 80         If supplied, the explicit list of items to be cycled over.""")
     81 
     82     rckey = param.String(default='axes.color_cycle', doc="""

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/param/__init__.pyc in __init__(self, default, class_, instantiate, bounds, **params)
   1056         self.class_ = class_
   1057         self.bounds = bounds
-> 1058         self._check_bounds(default)
   1059         Parameter.__init__(self,default=default,instantiate=instantiate,
   1060                            **params)

/afs/inf.ed.ac.uk/user/s11/s1145806/Documents/git/neukrill-venv-auto/lib/python2.7/site-packages/param/__init__.pyc in _check_bounds(self, val)
   1074         """
   1075         if not (isinstance(val,list)):
-> 1076             raise ValueError("List '%s' must be a list."%(self._attrib_name))
   1077 
   1078         if self.bounds is not None:

AttributeError: _attrib_name

In [5]:
import neukrill_net.utils
import neukrill_net.highlevelfeatures

In [6]:
import time

In [7]:
settings = neukrill_net.utils.Settings('settings.json')

In [8]:
X,y = settings.flattened_train_paths(settings.classes)

In [9]:
hlf = neukrill_net.highlevelfeatures.ContourMoments()

In [10]:
t0 = time.time()
XF = hlf.transform(X)
print("Computing features took {}".format(time.time()-t0))

In [24]:
XF.shape


Out[24]:
(1, 30336, 30)

In [26]:
sklearn.externals.joblib.dump((hlf,XF,y),'cache/contourmoments.pkl')


Out[26]:
['cache/contourmoments.pkl', 'cache/contourmoments.pkl_01.npy']

Naive Bayes


In [11]:
import sklearn.naive_bayes

In [12]:
clf = sklearn.naive_bayes.GaussianNB()

In [13]:
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=0.0596358776093
Accuracy=0.122428797468
Logloss=21.7494153182

Logistic Regression


In [14]:
clf = sklearn.linear_model.LogisticRegression(random_state=42)

In [15]:
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=51.0716290474
Accuracy=0.295424578059
Logloss=2.83966800982

Random Forest


In [18]:
import sklearn.ensemble

In [19]:
clf = sklearn.ensemble.RandomForestClassifier(n_estimators=1000, max_depth=20, min_samples_leaf=5)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=142.819734097
Accuracy=0.372824367089
Logloss=2.55767029422

Linear SVC


In [23]:
clf = sklearn.svm.SVC(kernel='linear', probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=72.912648201
Accuracy=0.339135021097
Logloss=2.64244706442

Non-linear SVC

one-vs-one


In [22]:
clf = sklearn.svm.SVC(probability=True, random_state=42)

t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=92.4036419392
Accuracy=0.294765295359
Logloss=2.71083425129

What if we add the Haralick features to this?


In [16]:
hlf2 = neukrill_net.highlevelfeatures.Haralick()

In [17]:
hlf_ = hlf+hlf2

In [18]:
XF_ = hlf_.transform(X)

In [19]:
XF_.shape


Out[19]:
(1, 30336, 56)

In [20]:
t0 = time.time()
X_train, X_test, y_train, y_test = sklearn.cross_validation.train_test_split(
    sklearn.preprocessing.StandardScaler().fit_transform(XF_.squeeze(0)), y, test_size=0.5, random_state=42)
clf.fit(X_train, y_train)

t1 = time.time()
total = t1-t0
print("Time={}".format(total))

print("Accuracy={}".format(clf.score(X_test, y_test)))
print("Logloss={}".format(sklearn.metrics.log_loss(y_test, clf.predict_proba(X_test))))


Time=91.0343399048
Accuracy=0.493934599156
Logloss=1.96769402153

In [ ]: